From 17e89f8169b1f05dd8538344f4743535da412c5e Mon Sep 17 00:00:00 2001 From: "arun.sharma@intel.com[kaf24]" Date: Sat, 28 May 2005 08:53:55 +0000 Subject: [PATCH] bitkeeper revision 1.1577 (429831a31IZvEeEnalH3oKUTY7jV7A) [PATCH] vmx-worldswitch-1-to-1.patch Fix VMX world switch to use 1:1 page tables when the guest has paging disabled. Also do a printk instead of VMX_DBG_LOG() anytime we crash a domain. Signed-off-by: Arun Sharma --- tools/libxc/xc_vmx_build.c | 2 +- xen/arch/x86/domain.c | 1 - xen/arch/x86/vmx.c | 58 +++++++++++++++++----------------- xen/arch/x86/vmx_io.c | 2 +- xen/arch/x86/vmx_platform.c | 10 ++++-- xen/arch/x86/vmx_vmcs.c | 2 +- xen/arch/x86/x86_32/traps.c | 8 ++++- xen/include/asm-x86/shadow.h | 7 ++-- xen/include/asm-x86/vmx.h | 11 ++++++- xen/include/asm-x86/vmx_vmcs.h | 1 - 10 files changed, 61 insertions(+), 41 deletions(-) diff --git a/tools/libxc/xc_vmx_build.c b/tools/libxc/xc_vmx_build.c index 360f375744..5cbd9b8577 100644 --- a/tools/libxc/xc_vmx_build.c +++ b/tools/libxc/xc_vmx_build.c @@ -10,7 +10,7 @@ #include #include "linux_boot_params.h" -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 30795b5831..1f42bd61ba 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -339,7 +339,6 @@ static int vmx_final_setup_guest( } ed->arch.schedule_tail = arch_vmx_do_launch; - clear_bit(VMX_CPU_STATE_PG_ENABLED, &ed->arch.arch_vmx.cpu_state); #if defined (__i386) ed->arch.arch_vmx.vmx_platform.real_mode_data = diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c index ac6dc6fde1..3c649b1a7a 100644 --- a/xen/arch/x86/vmx.c +++ b/xen/arch/x86/vmx.c @@ -122,7 +122,6 @@ static void inline __update_guest_eip(unsigned long inst_len) static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) { - struct exec_domain *ed = current; unsigned long eip; l1_pgentry_t gpte; unsigned long gpa; /* FIXME: PAE */ @@ -137,15 +136,8 @@ static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) } #endif - /* - * If vpagetable is zero, then we are still emulating 1:1 page tables, - * and we should have never gotten here. - */ - if ( !test_bit(VMX_CPU_STATE_PG_ENABLED, &ed->arch.arch_vmx.cpu_state) ) - { - printk("vmx_do_page_fault while running on 1:1 page table\n"); - return 0; - } + if (!vmx_paging_enabled(current)) + handle_mmio(va, va); gpte = gva_to_gpte(va); if (!(l1e_get_flags(gpte) & _PAGE_PRESENT) ) @@ -399,7 +391,7 @@ static void vmx_io_instruction(struct cpu_user_regs *regs, vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va; if (vio == 0) { - VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx", (unsigned long) vio); + printk("bad shared page: %lx", (unsigned long) vio); domain_crash_synchronous(); } p = &vio->vp_ioreq; @@ -423,7 +415,10 @@ static void vmx_io_instruction(struct cpu_user_regs *regs, laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi; } p->pdata_valid = 1; - p->u.pdata = (void *) gva_to_gpa(laddr); + + p->u.data = laddr; + if (vmx_paging_enabled(d)) + p->u.pdata = (void *) gva_to_gpa(p->u.data); p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0; if (test_bit(5, &exit_qualification)) /* "rep" prefix */ @@ -481,7 +476,7 @@ vmx_copy(void *buf, unsigned long laddr, int size, int dir) return 0; } - mfn = phys_to_machine_mapping(l1e_get_pfn(gva_to_gpte(laddr))); + mfn = phys_to_machine_mapping(laddr >> PAGE_SHIFT); addr = map_domain_mem((mfn << PAGE_SHIFT) | (laddr & ~PAGE_MASK)); if (dir == COPY_IN) @@ -570,6 +565,12 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c) error |= __vmwrite(CR0_READ_SHADOW, c->cr0); + if (!vmx_paging_enabled(d)) { + VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table"); + __vmwrite(GUEST_CR3, pagetable_val(d->domain->arch.phys_table)); + goto skip_cr3; + } + if (c->cr3 == d->arch.arch_vmx.cpu_cr3) { /* * This is simple TLB flush, implying the guest has @@ -578,7 +579,7 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c) */ mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT); if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table)) { - VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value=%lx", c->cr3); + printk("Invalid CR3 value=%lx", c->cr3); domain_crash_synchronous(); return 0; } @@ -590,7 +591,7 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c) */ VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %lx", c->cr3); if ((c->cr3 >> PAGE_SHIFT) > d->domain->max_pages) { - VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value=%lx", c->cr3); + printk("Invalid CR3 value=%lx", c->cr3); domain_crash_synchronous(); return 0; } @@ -605,6 +606,8 @@ vmx_world_restore(struct exec_domain *d, struct vmx_assist_context *c) __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table)); } +skip_cr3: + error |= __vmread(CR4_READ_SHADOW, &old_cr4); error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE)); error |= __vmwrite(CR4_READ_SHADOW, c->cr4); @@ -731,18 +734,18 @@ static int vmx_set_cr0(unsigned long value) struct exec_domain *d = current; unsigned long old_base_mfn, mfn; unsigned long eip; + int paging_enabled; /* * CR0: We don't want to lose PE and PG. */ + paging_enabled = vmx_paging_enabled(d); __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG)); + __vmwrite(CR0_READ_SHADOW, value); - if (value & (X86_CR0_PE | X86_CR0_PG) && - !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) { - /* - * Enable paging - */ - set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state); + VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value); + if ((value & X86_CR0_PE) && (value & X86_CR0_PG) + && !paging_enabled) { /* * The guest CR3 must be pointing to the guest physical. */ @@ -750,8 +753,7 @@ static int vmx_set_cr0(unsigned long value) d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) || !get_page(pfn_to_page(mfn), d->domain) ) { - VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx", - d->arch.arch_vmx.cpu_cr3); + printk("Invalid CR3 value = %lx", d->arch.arch_vmx.cpu_cr3); domain_crash_synchronous(); /* need to take a clean path */ } old_base_mfn = pagetable_get_pfn(d->arch.guest_table); @@ -776,8 +778,7 @@ static int vmx_set_cr0(unsigned long value) } else { if ((value & X86_CR0_PE) == 0) { __vmread(GUEST_EIP, &eip); - VMX_DBG_LOG(DBG_LEVEL_1, - "Disabling CR0.PE at %%eip 0x%lx", eip); + VMX_DBG_LOG(DBG_LEVEL_1, "Disabling CR0.PE at %%eip 0x%lx\n", eip); if (vmx_assist(d, VMX_ASSIST_INVOKE)) { set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &d->arch.arch_vmx.cpu_state); @@ -838,7 +839,6 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs) switch(cr) { case 0: { - __vmwrite(CR0_READ_SHADOW, value); return vmx_set_cr0(value); } case 3: @@ -848,7 +848,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs) /* * If paging is not enabled yet, simply copy the value to CR3. */ - if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) { + if (!vmx_paging_enabled(d)) { d->arch.arch_vmx.cpu_cr3 = value; break; } @@ -876,8 +876,7 @@ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs) !VALID_MFN(mfn = phys_to_machine_mapping(value >> PAGE_SHIFT)) || !get_page(pfn_to_page(mfn), d->domain) ) { - VMX_DBG_LOG(DBG_LEVEL_VMMU, - "Invalid CR3 value=%lx", value); + printk("Invalid CR3 value=%lx", value); domain_crash_synchronous(); /* need to take a clean path */ } old_base_mfn = pagetable_get_pfn(d->arch.guest_table); @@ -1133,6 +1132,7 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs) VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason); if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { + printk("Failed vm entry\n"); domain_crash_synchronous(); return; } diff --git a/xen/arch/x86/vmx_io.c b/xen/arch/x86/vmx_io.c index 8fc73da632..dc18839d2d 100644 --- a/xen/arch/x86/vmx_io.c +++ b/xen/arch/x86/vmx_io.c @@ -465,7 +465,7 @@ void vmx_intr_assist(struct exec_domain *d) void vmx_do_resume(struct exec_domain *d) { vmx_stts(); - if ( test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state) ) + if ( vmx_paging_enabled(d) ) __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table)); else // paging is not enabled in the guest diff --git a/xen/arch/x86/vmx_platform.c b/xen/arch/x86/vmx_platform.c index d346089051..0dfe6d855a 100644 --- a/xen/arch/x86/vmx_platform.c +++ b/xen/arch/x86/vmx_platform.c @@ -418,8 +418,12 @@ int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_l } if ((guest_eip & PAGE_MASK) == ((guest_eip + inst_len) & PAGE_MASK)) { - gpte = gva_to_gpte(guest_eip); - mfn = phys_to_machine_mapping(l1e_get_pfn(gpte)); + if (vmx_paging_enabled(current)) { + gpte = gva_to_gpte(guest_eip); + mfn = phys_to_machine_mapping(l1e_get_pfn(gpte)); + } else { + mfn = phys_to_machine_mapping(guest_eip >> PAGE_SHIFT); + } ma = (mfn << PAGE_SHIFT) | (guest_eip & (PAGE_SIZE - 1)); inst_start = (unsigned char *)map_domain_mem(ma); @@ -508,7 +512,7 @@ static void send_mmio_req(unsigned long gpa, } else p->count = 1; - if (pvalid) + if ((pvalid) && vmx_paging_enabled(current)) p->u.pdata = (void *) gva_to_gpa(p->u.data); #if 0 diff --git a/xen/arch/x86/vmx_vmcs.c b/xen/arch/x86/vmx_vmcs.c index e840c4c3d0..8f1768602f 100644 --- a/xen/arch/x86/vmx_vmcs.c +++ b/xen/arch/x86/vmx_vmcs.c @@ -291,7 +291,7 @@ construct_init_vmcs_guest(struct cpu_user_regs *regs, /* Initally PG, PE are not set*/ shadow_cr = host_env->cr0; - shadow_cr &= ~(X86_CR0_PE | X86_CR0_PG); + shadow_cr &= ~X86_CR0_PG; error |= __vmwrite(CR0_READ_SHADOW, shadow_cr); /* CR3 is set in vmx_final_setup_guest */ error |= __vmwrite(GUEST_CR4, host_env->cr4); diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c index 2070c8a49d..f545efcba4 100644 --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -20,8 +20,9 @@ void show_registers(struct cpu_user_regs *regs) unsigned long ss, ds, es, fs, gs, cs; unsigned long eip, esp, eflags; const char *context; - #ifdef CONFIG_VMX + unsigned long cr0, cr3; + if ( VMX_DOMAIN(current) && (regs->eflags == 0) ) { __vmread(GUEST_EIP, &eip); @@ -33,6 +34,8 @@ void show_registers(struct cpu_user_regs *regs) __vmread(GUEST_FS_SELECTOR, &fs); __vmread(GUEST_GS_SELECTOR, &gs); __vmread(GUEST_CS_SELECTOR, &cs); + __vmread(CR0_READ_SHADOW, &cr0); + __vmread(GUEST_CR3, &cr3); context = "vmx guest"; } else @@ -77,6 +80,9 @@ void show_registers(struct cpu_user_regs *regs) printk("ds: %04lx es: %04lx fs: %04lx gs: %04lx " "ss: %04lx cs: %04lx\n", ds, es, fs, gs, ss, cs); +#ifdef CONFIG_VMX + printk("cr0: %08lx cr3: %08lx\n", cr0, cr3); +#endif if ( GUEST_MODE(regs) ) show_guest_stack(); diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index 5b9eeaf17f..42a9c1e6f1 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -31,6 +31,9 @@ #include #include #include +#ifdef CONFIG_VMX +#include +#endif /* Shadow PT operation mode : shadow-mode variable in arch_domain. */ @@ -1672,8 +1675,8 @@ static inline void update_pagetables(struct exec_domain *ed) #ifdef CONFIG_VMX if ( VMX_DOMAIN(ed) ) - paging_enabled = - test_bit(VMX_CPU_STATE_PG_ENABLED, &ed->arch.arch_vmx.cpu_state); + paging_enabled = vmx_paging_enabled(ed); + else #endif // HACK ALERT: there's currently no easy way to figure out if a domU diff --git a/xen/include/asm-x86/vmx.h b/xen/include/asm-x86/vmx.h index d9bed6621b..a66ebb5f3f 100644 --- a/xen/include/asm-x86/vmx.h +++ b/xen/include/asm-x86/vmx.h @@ -294,5 +294,14 @@ static inline void vmx_stts() if (!(cr0 & X86_CR0_TS)) __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM); } - + +/* Works only for ed == current */ +static inline int vmx_paging_enabled(struct exec_domain *ed) +{ + unsigned long cr0; + + __vmread(CR0_READ_SHADOW, &cr0); + return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG); +} + #endif /* __ASM_X86_VMX_H__ */ diff --git a/xen/include/asm-x86/vmx_vmcs.h b/xen/include/asm-x86/vmx_vmcs.h index 70a9c34594..7ccfac0b6b 100644 --- a/xen/include/asm-x86/vmx_vmcs.h +++ b/xen/include/asm-x86/vmx_vmcs.h @@ -29,7 +29,6 @@ extern void stop_vmx(void); void vmx_enter_scheduler(void); -#define VMX_CPU_STATE_PG_ENABLED 0 #define VMX_CPU_STATE_ASSIST_ENABLED 1 struct vmcs_struct { -- 2.30.2